# coding:utf8

# 导入Spark的相关包
from operator import add
from pyspark.sql import SparkSession
from pyspark.sql.types import StructType, StringType, IntegerType
# 2访问次数前10的网站（wan)
if __name__ == '__main__':
    spark = SparkSession.builder. \
        appName("xixix_1"). \
        getOrCreate()
    sc = spark.sparkContext
    # 1. 读取数据集
    schema = StructType().add("time", StringType(), nullable=True). \
        add("id",  StringType(), nullable=True). \
        add("contain",  StringType(), nullable=True). \
        add("ts1", StringType(), nullable=True). \
        add("ts2", StringType(), nullable=True). \
        add("wzh", StringType(), nullable=True)
    df = spark.read.format("csv"). \
        option("sep", "\t"). \
        option("header", False). \
        option("encoding", "utf-8"). \
        schema(schema=schema). \
        load("hdfs://node1:8020/input/sougou.csv")
    df.createTempView("sougou")
    spark.sql("""
            SELECT wzh,count(*) as n FROM sougou GROUP BY wzh ORDER BY n desc limit 10
        """).write.mode("overwrite"). \
        format("jdbc"). \
        option("url", "jdbc:mysql://node1:3306/bigdata?useSSL=false&useUnicode=true"). \
        option("dbtable", "find_top_find"). \
        option("user", "root"). \
        option("password", "123456"). \
        save()
